# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
from hysop.tools.htypes import check_instance
from hysop.tools.units import bytes2str
from hysop.backend.device.opencl.opencl_types import (
vsizes,
signed_base_types,
unsigned_base_types,
float_base_types,
)
dtype_ops = {
np.int8: (0.25, "IOPS"),
np.int16: (0.50, "IOPS"),
np.int32: (1.00, "IOPS"),
np.int64: (2.00, "IOPS"),
np.uint8: (0.25, "IOPS"),
np.uint16: (0.50, "IOPS"),
np.uint32: (1.00, "IOPS"),
np.uint64: (2.00, "IOPS"),
np.float16: (0.50, "FLOPS"),
np.float32: (1.00, "FLOPS"),
np.float64: (2.00, "FLOPS"),
np.complex64: (2.00, "FLOPS"),
np.complex128: (4.00, "FLOPS"),
}
def _fill_dtype_ops():
integer_base_types = [signed_base_types, unsigned_base_types]
ibytes = [1, 2, 4, 8]
for int_base_types in integer_base_types:
for itype, size in zip(int_base_types, ibytes):
for vsize in vsizes:
typename = itype + ("" if vsize == 1 else str(vsize))
dtype_ops[typename] = (vsize * float(size) // 4, "IOPS")
fbytes = [2, 4, 8]
for ftype, size in zip(float_base_types, fbytes):
for vsize in vsizes:
typename = ftype + ("" if vsize == 1 else str(vsize))
dtype_ops[typename] = (vsize * float(size) // 4, "FLOPS")
_fill_dtype_ops()
[docs]
class WorkStatistics:
def __init__(self, stat=None):
if stat is not None:
check_instance(stat, WorkStatistics)
self.global_mem_byte_reads = stat.global_mem_byte_reads
self.global_mem_byte_writes = stat.global_mem_byte_writes
self.local_mem_byte_reads = stat.local_mem_byte_reads
self.local_mem_byte_writes = stat.local_mem_byte_writes
self.ops_per_type = copy.deepcopy(stat.ops_per_type)
else:
self.global_mem_byte_reads = 0
self.global_mem_byte_writes = 0
self.local_mem_byte_reads = 0
self.local_mem_byte_writes = 0
self.ops_per_type = {}
[docs]
def compute_timed_statistics(self, duration):
return TimedWorkStatistics(self, duration)
[docs]
def global_mem_transactions(self):
return self.global_mem_byte_writes + self.global_mem_byte_reads
[docs]
def global_mem_rw_ratio(self):
return float(self.global_mem_byte_writes) / self.global_mem_transactions()
[docs]
def global_mem_read_ratio(self):
return float(self.global_mem_byte_reads) / self.global_mem_transactions()
[docs]
def local_mem_transactions(self):
return self.local_mem_byte_writes + self.local_mem_byte_reads
[docs]
def local_mem_rw_ratio(self):
return float(self.local_mem_byte_writes) / self.local_mem_transactions()
[docs]
def local_mem_read_ratio(self):
return float(self.local_mem_byte_reads) / self.local_mem_transactions()
[docs]
def total_mem_transactions(self):
return self.local_mem_transactions() + self.global_mem_transactions()
[docs]
def has_local_mem_transactions(self):
return self.local_mem_transactions() > 0
[docs]
def has_global_mem_transactions(self):
return self.global_mem_transactions() > 0
def __add__(self, rhs):
check_instance(rhs, WorkStatistics)
stats = copy.deepcopy(self)
stats.global_mem_byte_reads += rhs.global_mem_byte_reads
stats.global_mem_byte_writes += rhs.global_mem_byte_writes
stats.local_mem_byte_reads += rhs.local_mem_byte_reads
stats.local_mem_byte_writes += rhs.local_mem_byte_writes
for k, v in rhs.ops_per_type.items():
if k not in stats.ops_per_type:
stats.ops_per_type[k] = v
else:
stats.ops_per_type[k] += v
return stats
def __mul__(self, rhs):
check_instance(rhs, int)
stats = copy.deepcopy(self)
stats.global_mem_byte_reads *= rhs
stats.global_mem_byte_writes *= rhs
stats.local_mem_byte_reads *= rhs
stats.local_mem_byte_writes *= rhs
for k in stats.ops_per_type.keys():
stats.ops_per_type[k] *= rhs
return stats
def __rmul__(self, lhs):
check_instance(lhs, int)
return self.__mul__(lhs)
def __str__(self):
op_count = [""] + [f"{k}: {v}" for (k, v) in self.ops_per_type.items()]
op_count = "\n ".join(op_count)
ss = ":: Work Statistics ::"
if self.has_global_mem_transactions():
ss += "\n Global memory: load={} store={} total={} rw_ratio={}".format(
bytes2str(self.global_mem_byte_reads),
bytes2str(self.global_mem_byte_writes),
bytes2str(self.global_mem_transactions()),
round(self.global_mem_rw_ratio(), 2),
)
if self.has_local_mem_transactions():
ss += "\n Local memory: load={} store={} total={} rw_ratio={}".format(
bytes2str(self.local_mem_byte_reads),
bytes2str(self.local_mem_byte_writes),
bytes2str(self.local_mem_transactions()),
round(self.local_mem_rw_ratio(), 2),
)
ss += f"\n Operations count: {op_count}"
return ss
[docs]
class TimedWorkStatistics(WorkStatistics):
def __init__(self, workstat, duration):
super().__init__(workstat)
self.duration = duration
self._init()
[docs]
def ops_per_second(self):
return self._ops_per_second
[docs]
def ops_per_category(self):
return self._ops_per_category
[docs]
def global_mem_throughput(self):
return self.global_mem_transactions() / self.duration
[docs]
def local_mem_throughput(self):
return self.local_mem_transactions() / self.duration
[docs]
def total_mem_throughput(self):
return self.total_mem_transactions() / self.duration
def _init(self):
for dtype in self.ops_per_type:
if dtype not in dtype_ops.keys():
msg = f"unknown type {dtype}, valed types are:\n\t{dtype_ops.keys()}."
raise ValueError(msg)
ops_count = {}
for dtype, N in self.ops_per_type.items():
(multiplier, op_category) = dtype_ops[dtype]
if op_category not in ops_count:
ops_count[op_category] = 0.0
ops_count[op_category] += multiplier * N
ops_per_second = {}
for op_category, op_count in ops_count.items():
ops_per_second[op_category] = op_count / self.duration
self._ops_per_category = ops_count
self._ops_per_second = ops_per_second